import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.read_csv("../nlp_date/train1.csv")

# 使用train_test_split来划分数据集
X = data['txt']
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train:", X_train.shape, "y_train:", y_train.shape, "X_test:", X_test.shape, "y_test:", y_test.shape)

